##  Replication code for BJPS paper
##  May 11, 2021
##  For questions please contact Holger Kern (hkern@fsu.edu)


setwd("~/Dropbox/Old projects/Russia I/data")
rm(list = ls())



##  helper function
hl3 <- function(dat, digits = 3)	{

	dat.new <- rep(NA,2)

	sym <- ""
	tvalue <- abs(as.numeric(dat[1]) / as.numeric(dat[2]))

	if(tvalue >= qnorm(.95))  sym <- "^{*}"
	if(tvalue >= qnorm(.975)) sym <- "^{**}"
	if(tvalue >= qnorm(.995)) sym <- "^{***}"

	dat.new[1] <- 
	paste("$", format(round(as.numeric(dat[1]), digits = digits),
	scientific = FALSE, nsmall = digits), sym[], "$", sep = "")

	dat.new[2] <- 
	paste("$(", format(round(as.numeric(dat[2]), digits = digits),
	scientific = FALSE, nsmall = digits),")$", sep = "")

	return(dat.new)

									}


library(Hmisc)
library(sandwich)
library(readxl)
library(MASS)
library(blockTools)
library(corrplot)

load("Russia replication data.RData")



##
##  RANDOMIZATION INFERENCE
##
##  table of results: show cell proportions 
Tr <- dat.outcomes$Tr

tab.raw <- matrix(NA,4,12)
rownames(tab.raw) <- c(	"Non-political",
						"Anti-regime",
						"Anti-regime with repression frame","p-value")

outcomes <- c("accept","price_list","reject","noreply","posrequest","negrequest")

k <- 1
for(k in 1:6)	{

	y <- unlist(dat.outcomes[outcomes[k]])
	tab.raw[1:3,(((k*2)-1):(k*2))] <- 	matrix(table(Tr,y)[,2] /
										apply(table(Tr,y),1,sum), 3, 2, byrow = TRUE)


	##  observed test statistic
	temp <- lm(y ~ Tr)
	stat.obs <- as.vector(temp$coef[-1] %*% temp$coef[-1])
	stat.rand <- rep(NA,100000)

	m <- 1
	for(m in 1:100000)	{

		temp <- lm(y ~ as.factor(Tr.rand[,m]))
		stat.rand[m] <- temp$coef[-1] %*% temp$coef[-1]
		if(m %% 10000 == 0)	{ cat(m, "\n") }

						}

	cat(k,"\n","\n")
	tab.raw[4,(2*k)] <- sum(stat.rand >= stat.obs) / ncol(Tr.rand)

				}

round(tab.raw,2)

tab.raw <- rbind(tab.raw, NA)
tab.raw[5,c(2,4,6,8,10,12)] <- apply(dat.outcomes[outcomes],2,mean)
latex(round(tab.raw,2), file = "")



##  joint randomization test over all outcomes following Alwyn 2019
temp1 <- lm(unlist(dat.outcomes[outcomes[1]]) ~ dat.outcomes$Tr)
temp2 <- lm(unlist(dat.outcomes[outcomes[2]]) ~ dat.outcomes$Tr)
temp3 <- lm(unlist(dat.outcomes[outcomes[3]]) ~ dat.outcomes$Tr)
temp4 <- lm(unlist(dat.outcomes[outcomes[4]]) ~ dat.outcomes$Tr)
temp5 <- lm(unlist(dat.outcomes[outcomes[5]]) ~ dat.outcomes$Tr)
temp6 <- lm(unlist(dat.outcomes[outcomes[6]]) ~ dat.outcomes$Tr)

stat.obs <- 	c(	temp1$coef[-1], temp2$coef[-1], temp3$coef[-1], temp4$coef[-1],
					temp5$coef[-1], temp6$coef[-1])


stat.rand <- matrix(NA,100000,30)

m <- 1
for(m in 1:100000)	{

	temp1 <- lm(unlist(dat.outcomes[outcomes[1]]) ~ as.factor(Tr.rand[,m]))
	temp2 <- lm(unlist(dat.outcomes[outcomes[2]]) ~ as.factor(Tr.rand[,m]))
	temp3 <- lm(unlist(dat.outcomes[outcomes[3]]) ~ as.factor(Tr.rand[,m]))
	temp4 <- lm(unlist(dat.outcomes[outcomes[4]]) ~ as.factor(Tr.rand[,m]))
	temp5 <- lm(unlist(dat.outcomes[outcomes[5]]) ~ as.factor(Tr.rand[,m]))
	temp6 <- lm(unlist(dat.outcomes[outcomes[6]]) ~ as.factor(Tr.rand[,m]))

	stat.rand[m,] <- c( temp1$coef[-1], temp2$coef[-1], temp3$coef[-1], temp4$coef[-1],
				  		temp5$coef[-1], temp6$coef[-1])

	if(m %% 1000 == 0)	{ cat(m, "\n") }

					}

V <- var(stat.rand)
V2 <- ginv(V)


stat.obs2 <- as.vector(t(stat.obs) %*% V2 %*% stat.obs)

stat.rand2 <- rep(NA,100000)
m <- 1
for(m in 1:100000)	{	stat.rand2[m] <- t(stat.rand[m,]) %*% V2 %*% stat.rand[m,]	}

round(sum(stat.rand2 > stat.obs2) / ncol(Tr.rand),2)

rm(V,V2,stat.rand,stat.rand2,stat.obs,stat.obs2,temp1,temp2,temp3,temp4,temp5,temp6,temp)
rm(tab.raw,m,k,y,Tr)



##########
##  PROBIT
##########
setwd("plots")

##  we run probit models and create plots of predicted probabilities as well as 
##  treatment effects
tab.probit <- matrix(NA,6,8)

##  choose outcome to model; run the code below 4 times with different outcomes
dat.outcomes$y <- dat.outcomes$acceptprice; main <- "Positive reply"; i <- 1
dat.outcomes$y <- dat.outcomes$rejectnoreply; main <- "Negative reply"; i <- 2
dat.outcomes$y <- dat.outcomes$posrequest; main <- "Technical request"; i <- 3
dat.outcomes$y <- dat.outcomes$negrequest; main <- "Political request"; i <- 4


glm.out <- glm(y ~ 	Tr, data = dat.outcomes, family = binomial(link = "probit"))
summary(glm.out)

tab.probit[,((i*2)-1):(i*2)] <- summary(glm.out)$coef[,1:2]
rownames(tab.probit) <- rownames(summary(glm.out)$coef)


X.pred <- model.matrix(glm.out)

sims <- 10000
out <- matrix(NA,sims,6)
colnames(out) <- paste("Tr", 1:6, sep = "")

set.seed(123)
N <- 1
for(N in 1:6)	{

	gamma.tilde <- mvrnorm(n = sims, mu = glm.out$coef, Sigma = vcov(glm.out))

	X.pred[,2:6] <- 0
	if(N > 1)	{ X.pred[,N] <- 1 }

	out[,N] <- apply(pnorm(X.pred %*% t(gamma.tilde)),2,mean)

	cat(N,"\n")

				}

out2 <- matrix(NA,3,6)
out2[1,] <- apply(out,2,mean)
out2[2,] <- apply(out,2,mean) - (apply(out,2,sd) * qnorm(.975))
out2[3,] <- apply(out,2,mean) + (apply(out,2,sd) * qnorm(.975))



pdf(file = paste0("Russia_plot_probabilities_",i,".pdf"))

plot(1:6, out2[1,], ylim = c(min(out2[2,])-.03, max(out2[3,])+.03), xlab = "",
cex.main = 1.2, cex.axis = 1.2, main = paste("Predicted probabilities:",main),
xaxt = "n", ylab = "Probability", pch = 19, cex.lab = 1.2)

for(m in 1:6)	{
lines(x = c(m,m), y = c(out2[2,m],out2[1,m]-.01), lwd = 2)
lines(x = c(m,m), y = c(out2[3,m],out2[1,m]+.01), lwd = 2)
				}

mtext(text = "non-political", side = 1, line = 1, at = 1.5, cex = 1,)
mtext(text = "no CA", side = 1, line = 2, at = 1, cex = 1)
mtext(text = "CA", side = 1, line = 2, at = 2, cex = 1)

mtext(text = "anti-regime", side = 1, line = 1, at = 3.5, cex = 1)
mtext(text = "no CA", side = 1, line = 2, at = 3, cex = 1)
mtext(text = "CA", side = 1, line = 2, at = 4, cex = 1)

mtext(text = "anti-regime (+)", side = 1, line = 1, at = 5.5, cex = 1)
mtext(text = "no CA", side = 1, line = 2, at = 5, cex = 1)
mtext(text = "CA", side = 1, line = 2, at = 6, cex = 1)


abline(v = 4.5, col = "grey60", lwd = 1.5)
abline(v = 2.5, col = "grey60", lwd = 1.5)

dev.off()



##
##  create plots of treatment effects
##
X.pred.0 <- model.matrix(glm.out)
X.pred.1 <- model.matrix(glm.out)

sims <- 10000
out <- matrix(NA,sims,5)
colnames(out) <- paste("Tr", 2:6, sep = "")


set.seed(123)
N <- M <- 1
for(N in 1:5)	{

	X.pred.0[,2:6] <- 0
	X.pred.1[,2:6] <- 0

	X.pred.1[,(N+1)] <- 1

	for(M in 1:sims)	{

		gamma.tilde <- mvrnorm(n = 1, mu = glm.out$coef, Sigma = vcov(glm.out))
		out[M,N] <- mean(pnorm(X.pred.1 %*% gamma.tilde) - pnorm(X.pred.0 %*% gamma.tilde))

						}

	cat(N,"\n")

				}


out2 <- matrix(NA,3,5)
out2[1,] <- apply(out,2,mean)
out2[2,] <- apply(out,2,mean) - (apply(out,2,sd) * qnorm(.975))
out2[3,] <- apply(out,2,mean) + (apply(out,2,sd) * qnorm(.975))



pdf(file = paste0("Russia_plot_effects_",i,".pdf"))

plot(1:5, out2[1,], ylim = c(min(out2[2,])-.03,max(out2[3,])+.03), xlim = c(.75,5.25),
xlab = "", main = paste("Treatment effects:",main),
xaxt = "n", ylab = expression("Treatment effect (change in probability)"),
pch = 19, cex.lab = 1.2, cex.main = 1.2, cex.axis = 1.2)
axis(side = 1, at = 1:6, labels = rep("",6))

for(m in 1:5)	{
lines(x = c(m,m), y = c(out2[2,m],out2[1,m]-.01), lwd = 2)
lines(x = c(m,m), y = c(out2[3,m],out2[1,m]+.01), lwd = 2)
				}

abline(h = 0, lty = 2)


##  Tr 2
mtext(text = "non-political", side = 1, line = 1, at = 1, cex = 1)
mtext(text = "CA", side = 1, line = 2, at = 1, cex = 1)

##  Tr 3
mtext(text = "anti-regime", side = 1, line = 1, at = 2, cex = 1)
mtext(text = "no CA", side = 1, line = 2, at = 2, cex = 1)

##  Tr 4
mtext(text = "anti-regime", side = 1, line = 1, at = 3, cex = 1)
mtext(text = "CA", side = 1, line = 2, at = 3, cex = 1)

##  Tr 5
mtext(text = "anti-regime (+)", side = 1, line = 1, at = 4, cex = 1)
mtext(text = "no CA", side = 1, line = 2, at = 4, cex = 1)

##  Tr 6
mtext(text = "anti-regime (+)", side = 1, line = 1, at = 5, cex = 1)
mtext(text = "CA", side = 1, line = 2, at = 5, cex = 1)


dev.off()


rm(glm.out,gamma.tilde,i,m,M,N,out,out2,X.pred,X.pred.0,X.pred.1)
##  repeat code for all four outcomes...



##  create probit estimates table for paper appendix
tab.probit.new <- tab.probit
m <- n <- 1
for(m in c(1,3,5,7))	{
	for(n in 1:6)			{

	tab.probit.new[n,m:(m+1)] <- hl3(tab.probit.new[n,m:(m+1)],2)

							}
						}

latex(tab.probit.new, file = "")

rm(tab.probit,tab.probit.new,m,n,sims,outcomes,main)



##############################################
##  Investigate treatment effect heterogeneity
##############################################
##
##  plug in different moderators measured at the site, city, or regional level
##
##  let's automate the generation of plots by using a loop

##  estimate percent changes instead of first differences?
##  we do not report percent changes since the skewed MC distribution leads to
##  large CIs and Wald tests with little power.
percent <- FALSE


q <- 1
for(q in 1:23)	{

cat(q,"\n")

if(q == 1)	{
dat.outcomes$M <- dat.outcomes$mr.count.city
legends <- c("Low repression","High repression")
file_name <- "repression_city"
			}

if(q == 2)	{
dat.outcomes$M <- dat.outcomes$mr.count.region
legends <- c("Low repression","High repression")
file_name <- "repression_region"
			}

if(q == 3)	{
##  citizen satisfaction with transparency
dat.outcomes$M <- dat.outcomes$reg_media
legends <- c("Low citizen satisfaction with transparency","High citizen satisfaction with transparency")
file_name <- "transparency_satisfaction"
			}

if(q == 4)	{
##  UR vote shares in 2011
dat.outcomes$M <- dat.outcomes$reg_ur2011
legends <- c("Low 2011 UR vote share","High 2011 UR vote share")
file_name <- "UR2011"
			}

if(q == 5)	{
##  UR vote shares in 2007
dat.outcomes$M <- dat.outcomes$reg_ur2007
legends <- c("Low 2007 UR vote share","High 2007 UR vote share")
file_name <- "UR2007"
			}

if(q == 6)	{
## distance to Moscow
dat.outcomes$M <- dat.outcomes$distance_to_moscow
legends <- c("Low distance to Moscow","High distance to Moscow")
file_name <- "distance_moscow"
			}

if(q == 7)	{
##  Moscow or St. Petersburg
dat.outcomes$M <- dat.outcomes$maincity
legends <- c("Other cities","Moscow or St.Petersburg")
file_name <- "moscow_petersburg"
			}

if(q == 8)	{
##  small sites / big sites
dat.outcomes$M <- dat.outcomes$alexa_reach_rank_percent
legends <- c("Low Alexa reach rank","High Alexa reach rank")
file_name <- "alexa_reach"
			}

if(q == 9)	{
dat.outcomes$M <- as.numeric(as.character(dat.outcomes$alexa_global_rank_percent))
legends <- c("Low Alexa global rank","High Alexa global rank")
file_name <- "alexa_global"
			}

if(q == 10)	{
dat.outcomes$M <- as.numeric(as.character(dat.outcomes$percentage_of_links_from_rus_domains))
legends <- c("Low percentage of links from RUS domains","High percentage of links from RUS domains")
file_name <- "alexa_RUS"
			}



##  geoData
if(q == 11)	{
dat.outcomes$M <- dat.outcomes$speech_imp
legends <- c("Low importance of freedom of speech","High importance of freedom of speech")
file_name <- "importance_free_speech"
			}

if(q == 12)	{
dat.outcomes$M <- dat.outcomes$speech_have
legends <- c("Low existence of freedom of speech","High existence of freedom of speech")
file_name <- "existence_free_speech"
			}

if(q == 13)	{
dat.outcomes$M <- dat.outcomes$info_imp
legends <- c("Low importance of freedom of information","High importance of freedom of information")
file_name <- "importance_free_information"
			}

if(q == 14)	{
dat.outcomes$M <- dat.outcomes$info_have
legends <- c("Low existence of freedom of information","High existence of freedom of information")
file_name <- "existence_free_information"
			}

if(q == 15)	{
dat.outcomes$M <- dat.outcomes$net_news
legends <- c("Low prop. internet users for news","High prop. internet users for news")
file_name <- "internet_news"
			}

if(q == 16)	{
dat.outcomes$M <- dat.outcomes$net_forum
legends <- c("Low prop. social media users","High prop. social media users")
file_name <- "social_media"
			}

if(q == 17)	{
dat.outcomes$M <- (1-dat.outcomes$net_nohome) ## reverse coded
legends <- c("Low prop. with home internet","High prop. with home internet")
file_name <- "home_internet"
			}

if(q == 18)	{
dat.outcomes$M <- dat.outcomes$news_net
legends <- c("Low prop. pred. internet for news","High prop. pred. internet for news")
file_name <- "pred_internet_news"
			}

if(q == 19)	{
dat.outcomes$M <- dat.outcomes$app_medvedev
legends <- c("Low Medvedev approval","High Medvedev approval")
file_name <- "approval_medvedev"
			}

if(q == 20)	{
dat.outcomes$M <- dat.outcomes$app_putin
legends <- c("Low Putin approval","High Putin approval")
file_name <- "approval_putin"
			}

if(q == 21)	{
dat.outcomes$M <- as.logical(dat.outcomes$general)
legends <- c("Specialized news site","General news site")
file_name <- "general"
			}

if(q == 22)	{
dat.outcomes$M <- dat.outcomes$media_pro_speech
legends <- c("Neutral","Media pro free speech")
file_name <- "media_pro_speech"
			}

if(q == 23)	{
dat.outcomes$M <- dat.outcomes$media_anti_speech
legends <- c("Neutral","Media anti free speech")
file_name <- "media_anti_speech"
			}


##  we dichotomize at the sample median to maximize power
if(!is.logical(dat.outcomes$M))		
{ dat.outcomes$M <- dat.outcomes$M > median(dat.outcomes$M, na.rm = TRUE) }

table(dat.outcomes$M)
sum(is.na(dat.outcomes$M))


##  we're restricting the dataset to observations with complete information
##  on a covariate-by-covariate basis
dat.outcomes.lim <- dat.outcomes[!is.na(dat.outcomes$M),]
dim(dat.outcomes.lim)

dat.outcomes.lim$y <- dat.outcomes.lim$acceptprice; main <- "Positive reply"; i <- 1
#dat.outcomes.lim$y <- dat.outcomes.lim$rejectnoreply; main <- "Negative reply"; i <- 2
# dat.outcomes.lim$y <- dat.outcomes.lim$posrequest; main <- "Technical request"; i <- 3
# dat.outcomes.lim$y <- dat.outcomes.lim$negrequest; main <- "Political request"; i <- 4

glm.het <-	glm(y ~ Tr * M, data = dat.outcomes.lim, family = binomial(link = "probit"))
summary(glm.het)



##  create plots of estimated probabilities
X.pred.low <- model.matrix(glm.het)
X.pred.high <- model.matrix(glm.het)

sims <- 10000
out.low <- matrix(NA,sims,6)
out.high <- matrix(NA,sims,6)
colnames(out.low) <- paste("Tr", 1:6, sep = "")
colnames(out.high) <- paste("Tr", 1:6, sep = "")


set.seed(123)
N <- 1
for(N in 1:6)	{

	X.pred.low[,c(2:12)] <- 0
	X.pred.high[,c(2:12)] <- 0

	X.pred.high[,7] <- 1

	X.pred.low[,N] <- 1
	X.pred.high[,N] <- 1

	X.pred.high[,(N+6)] <- 1

	gamma.tilde <- mvrnorm(n = sims, mu = glm.het$coef, Sigma = vcov(glm.het))
	out.low[,N] <- apply(pnorm(X.pred.low %*% t(gamma.tilde)),2,mean)

	gamma.tilde <- mvrnorm(n = sims, mu = glm.het$coef, Sigma = vcov(glm.het))
	out.high[,N] <- apply(pnorm(X.pred.high %*% t(gamma.tilde)),2,mean)

				}

out.low.2 <- matrix(NA,3,6)
out.low.2[1,] <- apply(out.low,2,mean)
out.low.2[2,] <- apply(out.low,2,mean) - (apply(out.low,2,sd) * qnorm(.975))
out.low.2[3,] <- apply(out.low,2,mean) + (apply(out.low,2,sd) * qnorm(.975))

out.high.2 <- matrix(NA,3,6)
out.high.2[1,] <- apply(out.high,2,mean)
out.high.2[2,] <- apply(out.high,2,mean) - (apply(out.high,2,sd) * qnorm(.975))
out.high.2[3,] <- apply(out.high,2,mean) + (apply(out.high,2,sd) * qnorm(.975))



pdf(file = paste0("Russia_plot_probabilities_",file_name,"_",i,".pdf"))

plot(.95:5.95, out.low.2[1,],
ylim = c(min(c(out.low.2[2,],out.high.2[2,]))-.03,max(c(out.low.2[3,],out.high.2[3,]))+.03),
xlim = c(.75,6.25), xlab = "", main = paste("Predicted probabilities:",main), xaxt = "n",
ylab = "Probability", pch = 21, cex.lab = 1.2, cex.main = 1.2, cex.axis = 1.2)

axis(side = 1, at = 1:6, labels = rep("",6))


for(m in 1:6)	{
lines(x = c(m-.05,m-.05), y = c(out.low.2[2,m],out.low.2[1,m]-.02), lwd = 1.5)
lines(x = c(m-.05,m-.05), y = c(out.low.2[3,m],out.low.2[1,m]+.02), lwd = 1.5)
				}

points((.95:5.95)+.1, out.high.2[1,], pch = 19, col = "black")

for(m in 1:6)	{
	lines(x = c(m+.05,m+.05), y = c(out.high.2[2,m],out.high.2[1,m]-.02), lwd = 1.5)
	lines(x = c(m+.05,m+.05), y = c(out.high.2[3,m],out.high.2[1,m]+.02), lwd = 1.5)
				}


mtext(text = "non-political", side = 1, line = 1, at = 1.5, cex = 1,)
mtext(text = "no CA", side = 1, line = 2, at = 1, cex = 1)
mtext(text = "CA", side = 1, line = 2, at = 2, cex = 1)

mtext(text = "anti-regime", side = 1, line = 1, at = 3.5, cex = 1)
mtext(text = "no CA", side = 1, line = 2, at = 3, cex = 1)
mtext(text = "CA", side = 1, line = 2, at = 4, cex = 1)

mtext(text = "anti-regime (+)", side = 1, line = 1, at = 5.5, cex = 1)
mtext(text = "no CA", side = 1, line = 2, at = 5, cex = 1)
mtext(text = "CA", side = 1, line = 2, at = 6, cex = 1)


abline(v = 4.5, col = "grey60", lwd = 1.5)
abline(v = 2.5, col = "grey60", lwd = 1.5)


points(2.7,max(c(out.low.2[3,],out.high.2[3,])+.02), pch = 21)
points(2.7,max(c(out.low.2[3,],out.high.2[3,])-.01), pch = 19)

text(x = 2.75, y = max(c(out.low.2[3,],out.high.2[3,])+.02), legends[1], pos = 4)
text(x = 2.75, y = max(c(out.low.2[3,],out.high.2[3,])-.01), legends[2], pos = 4)


dev.off()



##
##  simulate difference in treatment effects between low and high levels of moderator
## 
X.pred.low.0 <- model.matrix(glm.het)
X.pred.low.1 <- model.matrix(glm.het)
X.pred.high.0 <- model.matrix(glm.het)
X.pred.high.1 <- model.matrix(glm.het)

sims <- 10000
out.low <- matrix(NA,sims,5)
out.high <- matrix(NA,sims,5)

colnames(out.low) <- paste("Tr", 2:6, sep = "")
colnames(out.high) <- paste("Tr", 2:6, sep = "")


set.seed(123)
N <- M <- 1
for(N in 1:5)	{

	X.pred.low.0[,c(2:12)] <- 0
	X.pred.low.1[,c(2:12)] <- 0
	X.pred.high.0[,c(2:12)] <- 0
	X.pred.high.1[,c(2:12)] <- 0

	X.pred.high.0[,7] <- 1
	X.pred.high.1[,7] <- 1

	X.pred.low.1[,(N+1)] <- 1

	X.pred.high.1[,(N+1)] <- 1
	X.pred.high.1[,(N+7)] <- 1


	for(M in 1:sims)	{

		if(percent == FALSE)	{
		gamma.tilde <- mvrnorm(n = 1, mu = glm.het$coef, Sigma = vcov(glm.het))
		out.low[M,N]  <- mean(pnorm(X.pred.low.1 %*% gamma.tilde) - pnorm(X.pred.low.0 %*% gamma.tilde))

		gamma.tilde <- mvrnorm(n = 1, mu = glm.het$coef, Sigma = vcov(glm.het))
		out.high[M,N] <- mean(pnorm(X.pred.high.1 %*% gamma.tilde) - pnorm(X.pred.high.0 %*% gamma.tilde))
								} else
								{
		gamma.tilde <- mvrnorm(n = 1, mu = glm.het$coef, Sigma = vcov(glm.het))
		out.low[M,N]  <- mean((pnorm(X.pred.low.1 %*% gamma.tilde) - pnorm(X.pred.low.0 %*% gamma.tilde))/pnorm(X.pred.low.0 %*% gamma.tilde) ) * 100

		gamma.tilde <- mvrnorm(n = 1, mu = glm.het$coef, Sigma = vcov(glm.het))
		out.high[M,N] <- mean((pnorm(X.pred.high.1 %*% gamma.tilde) - pnorm(X.pred.high.0 %*% gamma.tilde))/pnorm(X.pred.high.0 %*% gamma.tilde) ) * 100
								}

						}

				}

out.low2 <- matrix(NA,3,5)
out.low2[1,] <- apply(out.low,2,mean)
out.low2[2,] <- apply(out.low,2,mean) - (apply(out.low,2,sd) * qnorm(.975))
out.low2[3,] <- apply(out.low,2,mean) + (apply(out.low,2,sd) * qnorm(.975))

out.high2 <- matrix(NA,3,5)
out.high2[1,] <- apply(out.high,2,mean)
out.high2[2,] <- apply(out.high,2,mean) - (apply(out.high,2,sd) * qnorm(.975))
out.high2[3,] <- apply(out.high,2,mean) + (apply(out.high,2,sd) * qnorm(.975))



pdf(file = paste0("Russia_plot_",file_name,"_",i,"_",percent,".pdf"))

plot(.95:4.95, out.low2[1,],
ylim = c(min(c(out.low2[2,],out.high2[2,]))-.03,max(c(out.low2[3,],out.high2[3,]))+.03),
xlim = c(.75,5.25), xlab = "", main = "Treatment effect moderation",
xaxt = "n", pch = 21, cex.lab = 1.2, cex.main = 1.2, cex.axis = 1.2,
ylab = ifelse(percent, expression("Treatment effect (percentage change)"),
expression("Treatment effect (change in probability)")))

axis(side = 1, at = 1:6, labels = rep("",6))

mtext(text = main, side = 3, line = .4, at = 3, cex = 1.2)


for(m in 1:5)	{
	lines(x = c(m-.05,m-.05), y = c(out.low2[2,m],out.low2[1,m]-.02), lwd = 1.5)
	lines(x = c(m-.05,m-.05), y = c(out.low2[3,m],out.low2[1,m]+.02), lwd = 1.5)
				}


points(1.05:5.05, out.high2[1,], pch = 19, col = "black")

for(m in 1:5)	{
	lines(x = c(m+.05,m+.05), y = c(out.high2[2,m],out.high2[1,m]-.02), lwd = 1.5)
	lines(x = c(m+.05,m+.05), y = c(out.high2[3,m],out.high2[1,m]+.02), lwd = 1.5)
				}

abline(h = 0, lty = 2)


##  Tr 2
mtext(text = "non-political", side = 1, line = 1, at = 1, cex = 1)
mtext(text = "CA", side = 1, line = 2, at = 1, cex = 1)

##  Tr 3
mtext(text = "anti-regime", side = 1, line = 1, at = 2, cex = 1)
mtext(text = "no CA", side = 1, line = 2, at = 2, cex = 1)

##  Tr 4
mtext(text = "anti-regime", side = 1, line = 1, at = 3, cex = 1)
mtext(text = "CA", side = 1, line = 2, at = 3, cex = 1)

##  Tr 5
mtext(text = "anti-regime (+)", side = 1, line = 1, at = 4, cex = 1)
mtext(text = "no CA", side = 1, line = 2, at = 4, cex = 1)

##  Tr 6
mtext(text = "anti-regime (+)", side = 1, line = 1, at = 5, cex = 1)
mtext(text = "CA", side = 1, line = 2, at = 5, cex = 1)


points(2.5, max(c(out.low2[3,],out.high2[3,])+.02), pch = 21)
points(2.5, max(c(out.low2[3,],out.high2[3,])-.01), pch = 19)

text(x = 2.55, y = max(c(out.low2[3,],out.high2[3,])+.02), legends[1], pos = 4)
text(x = 2.55, y = max(c(out.low2[3,],out.high2[3,])-.01), legends[2], pos = 4)



##  Wald test to check if heterogeneity is statistically significant
out.temp <- cbind(out.low, out.high)
V <- var(out.temp)
Q <- matrix(0,5,10)
Q[1,1] <- 1
Q[1,6] <- -1
Q[2,2] <- 1
Q[2,7] <- -1
Q[3,3] <- 1
Q[3,8] <- -1
Q[4,4] <- 1
Q[4,9] <- -1
Q[5,5] <- 1
Q[5,10] <- -1
Q

r <- rep(0,5)
theta <- apply(out.temp,2,mean)

## compute Wald test statistic
W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
round(1 - pchisq(W, df = nrow(Q)),2)


##  add Wald test results to plot
text(1.6, min(c(out.low2[2,],out.high2[2,]))-.03,
substitute(paste("Wald test: ", Chi[5]^2, " = ", v, "; ", italic(p), " = ", pp),
list(v = round(W,2), pp = round(1 - pchisq(W, df = nrow(Q)),2))))

dev.off()

rm(dat.outcomes.lim,file_name,gamma.tilde,glm.het,i,legends,m,M,main,N)
rm(out.high,out.high2,out.low,out.low2,out.high.2,out.low.2,out.temp,Q,r,theta,V,W)
rm(X.pred.high,X.pred.low,X.pred.high.0,X.pred.high.1,X.pred.low.0,X.pred.low.1)

	}



##
##  Create balance table
##
tab.bal <- matrix(NA,11,7)
rownames(tab.bal) <- c(	"Regional United Russia vote share 2007",
						"Regional United Russia vote share 2011",
						"Distance to Moscow (km)",
						"Moscow or Saint Petersburg",
						"General news site",
						"Media bias: pro-free speech",
						"Media bias: neutral",
						"Media bias: anti-free speech",
						"Putin approval (FOM)",
						"Medvedev approval (FOM)",
						"Incidents of repression against journalists (GDF)")

covars <- c(	"reg_ur2007",
				"reg_ur2011",
				"distance_to_moscow",
				"maincity",
				"general",
				"media_pro_speech",
				"media_neutral_speech",
				"media_anti_speech",
				"app_putin",
				"app_medvedev",
				"mr.count.region")


##  compute observed balance statistics
##  (dropping obs with missing values on a variable-by-variable basis)
m <- 1
for(m in 1:nrow(tab.bal))	{

	k <- 1
	for(k in 1:6)	{	
		tab.bal[m,k] <- mean(dat.outcomes[covars[m]][dat.outcomes$Tr == k,], na.rm = TRUE)
					}

							}

round(tab.bal,2)


##  randomization inference balance test following Young (2019)
k <- 1
for(k in 1:nrow(tab.bal))	{

	x <- unlist(dat.outcomes[covars[k]])

	##  observed test statistic
	temp <- lm(x ~ as.factor(dat.outcomes$Tr))
	stat.obs <- as.vector(temp$coef[-1] %*% temp$coef[-1])
	stat.rand <- rep(NA,100000)

	m <- 1
	for(m in 1:100000)	{

		temp <- lm(x ~ as.factor(Tr.rand[,m]))
		stat.rand[m] <- temp$coef[-1] %*% temp$coef[-1]
		if(m %% 10000 == 0)	{ cat(m, "\n") }

						}

	tab.bal[k,7] <- sum(stat.rand > stat.obs) / length(stat.rand)
	cat(k,"\n","\n")

							}

round(tab.bal,2)
latex(round(tab.bal,2), file = "")


##  joint randomization test over all outcomes following Young 2019
	temp1  <- lm(unlist(dat.outcomes[covars[1]])  ~ as.factor(dat.outcomes$Tr))
	temp2  <- lm(unlist(dat.outcomes[covars[2]])  ~ as.factor(dat.outcomes$Tr))
	temp3  <- lm(unlist(dat.outcomes[covars[3]])  ~ as.factor(dat.outcomes$Tr))
	temp4  <- lm(unlist(dat.outcomes[covars[4]])  ~ as.factor(dat.outcomes$Tr))
	temp5  <- lm(unlist(dat.outcomes[covars[5]])  ~ as.factor(dat.outcomes$Tr))
	temp6  <- lm(unlist(dat.outcomes[covars[6]])  ~ as.factor(dat.outcomes$Tr))
	temp7  <- lm(unlist(dat.outcomes[covars[7]])  ~ as.factor(dat.outcomes$Tr))
	temp8  <- lm(unlist(dat.outcomes[covars[8]])  ~ as.factor(dat.outcomes$Tr))
	temp9  <- lm(unlist(dat.outcomes[covars[9]])  ~ as.factor(dat.outcomes$Tr))
	temp10 <- lm(unlist(dat.outcomes[covars[10]]) ~ as.factor(dat.outcomes$Tr))
	temp11 <- lm(unlist(dat.outcomes[covars[11]]) ~ as.factor(dat.outcomes$Tr))

stat.obs <- 	c(	temp1$coef[-1],
					temp2$coef[-1],
					temp3$coef[-1],
					temp4$coef[-1],
					temp5$coef[-1],
					temp6$coef[-1],
					temp7$coef[-1],
					temp8$coef[-1],
					temp9$coef[-1],
					temp10$coef[-1],
					temp11$coef[-1])

stat.rand <- matrix(NA,100000,length(stat.obs))


m <- 1
for(m in 1:100000)	{

	temp1  <- lm(unlist(dat.outcomes[covars[1]])  ~ as.factor(Tr.rand[,m]))
	temp2  <- lm(unlist(dat.outcomes[covars[2]])  ~ as.factor(Tr.rand[,m]))
	temp3  <- lm(unlist(dat.outcomes[covars[3]])  ~ as.factor(Tr.rand[,m]))
	temp4  <- lm(unlist(dat.outcomes[covars[4]])  ~ as.factor(Tr.rand[,m]))
	temp5  <- lm(unlist(dat.outcomes[covars[5]])  ~ as.factor(Tr.rand[,m]))
	temp6  <- lm(unlist(dat.outcomes[covars[6]])  ~ as.factor(Tr.rand[,m]))
	temp7  <- lm(unlist(dat.outcomes[covars[7]])  ~ as.factor(Tr.rand[,m]))
	temp8  <- lm(unlist(dat.outcomes[covars[8]])  ~ as.factor(Tr.rand[,m]))
	temp9  <- lm(unlist(dat.outcomes[covars[9]])  ~ as.factor(Tr.rand[,m]))
	temp10 <- lm(unlist(dat.outcomes[covars[10]]) ~ as.factor(Tr.rand[,m]))
	temp11 <- lm(unlist(dat.outcomes[covars[11]]) ~ as.factor(Tr.rand[,m]))

	stat.rand[m,] <- c( temp1$coef[-1],
						temp2$coef[-1],
						temp3$coef[-1],
						temp4$coef[-1],
				  		temp5$coef[-1],
				  		temp6$coef[-1],
				  		temp7$coef[-1],
				  		temp8$coef[-1],
				  		temp9$coef[-1],
				  		temp10$coef[-1],
				  		temp11$coef[-1])

	if(m %% 1000 == 0)	{ cat(m, "\n") }

					}

V <- var(stat.rand)
V2 <- ginv(V)

stat.obs2 <- as.vector(t(stat.obs) %*% V2 %*% stat.obs)

stat.rand2 <- rep(NA,100000)
m <- 1
for(m in 1:100000)	{	stat.rand2[m] <- t(stat.rand[m,]) %*% V2 %*% stat.rand[m,]	}

round(sum(stat.rand2 >= stat.obs2) / length(stat.rand2),2)

rm(stat.rand,stat.rand2, stat.obs,stat.obs2,m,V,V2)
rm(temp1,temp2,temp3,temp4,temp5,temp6,temp7,temp8,temp9,temp10,temp11,temp,k,x)
rm(sims,percent,q,tab.bal)



######
## OLS
######
tab.ols <- matrix(NA,13,12)
##  4 models with three specifications each: no covariates, covariates,
##  and covariates + blocks dummies

## acceptprice, no predictors
lm.out <- lm(acceptprice ~		Tr,
								data = dat.outcomes)

summary(lm.out)

se <- sqrt(diag(vcovHC(lm.out, type = "HC2")))
stopifnot(!is.na(se))
tab.ols[c(1,3,5,7,9,11),1] <- summary(lm.out)$coef[1:6,1]
tab.ols[c(2,4,6,8,10,12),1] <- se[1:6]
tab.ols[13,1] <- nrow(lm.out$model)


## acceptprice, covariates
lm.out <- lm(acceptprice ~ 		Tr + 
								reg_ur2007 +
								reg_ur2011 +
								distance_to_moscow +
								maincity +
								general +
								media_pro_speech +
								media_neutral_speech +
								app_putin +
								app_medvedev +
								mr.count.region,
								data = dat.outcomes)

summary(lm.out)

se <- sqrt(diag(vcovHC(lm.out, type = "HC2")))
stopifnot(!is.na(se))
tab.ols[c(1,3,5,7,9,11),2] <- summary(lm.out)$coef[1:6,1]
tab.ols[c(2,4,6,8,10,12),2] <- se[1:6]
tab.ols[13,2] <- nrow(lm.out$model)


## acceptprice, covariates and block dummies
lm.out <- lm(acceptprice ~		Tr + 
								reg_ur2007 +
								reg_ur2011 +
								distance_to_moscow +
								maincity +
								general +
								media_pro_speech +
								media_neutral_speech +
								app_putin +
								app_medvedev +
								mr.count.region +
								as.factor(block),
								data = dat.outcomes)

summary(lm.out)

se <- sqrt(diag(vcovHC(lm.out, type = "HC2")))
stopifnot(!is.na(se))
tab.ols[c(1,3,5,7,9,11),3] <- summary(lm.out)$coef[1:6,1]
tab.ols[c(2,4,6,8,10,12),3] <- se[1:6]
tab.ols[13,3] <- nrow(lm.out$model)



## rejectnoreply, no predictors
lm.out <- lm(rejectnoreply ~ 	Tr,
								data = dat.outcomes)

summary(lm.out)

se <- sqrt(diag(vcovHC(lm.out, type = "HC2")))
stopifnot(!is.na(se))
tab.ols[c(1,3,5,7,9,11),4] <- summary(lm.out)$coef[1:6,1]
tab.ols[c(2,4,6,8,10,12),4] <- se[1:6]
tab.ols[13,4] <- nrow(lm.out$model)


## rejectnoreply, covariates
lm.out <- lm(rejectnoreply ~ 	Tr + 
								reg_ur2007 +
								reg_ur2011 +
								distance_to_moscow +
								maincity +
								general +
								media_pro_speech +
								media_neutral_speech +
								app_putin +
								app_medvedev +
								mr.count.region,
								data = dat.outcomes)

summary(lm.out)

se <- sqrt(diag(vcovHC(lm.out, type = "HC2")))
stopifnot(!is.na(se))
tab.ols[c(1,3,5,7,9,11),5] <- summary(lm.out)$coef[1:6,1]
tab.ols[c(2,4,6,8,10,12),5] <- se[1:6]
tab.ols[13,5] <- nrow(lm.out$model)


## rejectnoreply, covariates and block dummies
lm.out <- lm(rejectnoreply ~ 	Tr + 
								reg_ur2007 +
								reg_ur2011 +
								distance_to_moscow +
								maincity +
								general +
								media_pro_speech +
								media_neutral_speech +
								app_putin +
								app_medvedev +
								mr.count.region +
								as.factor(block),
								data = dat.outcomes)

summary(lm.out)

se <- sqrt(diag(vcovHC(lm.out, type = "HC2")))
stopifnot(!is.na(se))
tab.ols[c(1,3,5,7,9,11),6] <- summary(lm.out)$coef[1:6,1]
tab.ols[c(2,4,6,8,10,12),6] <- se[1:6]
tab.ols[13,6] <- nrow(lm.out$model)



##  posrequest, no predictors
lm.out <- lm(posrequest ~		Tr,
								data = dat.outcomes)

summary(lm.out)

se <- sqrt(diag(vcovHC(lm.out, type = "HC2")))
stopifnot(!is.na(se))
tab.ols[c(1,3,5,7,9,11),7] <- summary(lm.out)$coef[1:6,1]
tab.ols[c(2,4,6,8,10,12),7] <- se[1:6]
tab.ols[13,7] <- nrow(lm.out$model)


##  posrequest, covariates
lm.out <- lm(posrequest ~ 		Tr + 
								reg_ur2007 +
								reg_ur2011 +
								distance_to_moscow +
								maincity +
								general +
								media_pro_speech +
								media_neutral_speech +
								app_putin +
								app_medvedev +
								mr.count.region,
								data = dat.outcomes)

summary(lm.out)

se <- sqrt(diag(vcovHC(lm.out, type = "HC2")))
stopifnot(!is.na(se))
tab.ols[c(1,3,5,7,9,11),8] <- summary(lm.out)$coef[1:6,1]
tab.ols[c(2,4,6,8,10,12),8] <- se[1:6]
tab.ols[13,8] <- nrow(lm.out$model)


##  posrequest, covariates and block dummies
lm.out <- lm(posrequest ~ 		Tr + 
								reg_ur2007 +
								reg_ur2011 +
								distance_to_moscow +
								maincity +
								general +
								media_pro_speech +
								media_neutral_speech +
								app_putin +
								app_medvedev +
								mr.count.region +
								as.factor(block),
								data = dat.outcomes)

summary(lm.out)

se <- sqrt(diag(vcovHC(lm.out, type = "HC2")))
stopifnot(!is.na(se))
tab.ols[c(1,3,5,7,9,11),9] <- summary(lm.out)$coef[1:6,1]
tab.ols[c(2,4,6,8,10,12),9] <- se[1:6]
tab.ols[13,9] <- nrow(lm.out$model)


##  negrequest, no predictors
lm.out <- lm(negrequest ~		Tr,
								data = dat.outcomes)

summary(lm.out)

se <- sqrt(diag(vcovHC(lm.out, type = "HC2")))
stopifnot(!is.na(se))
tab.ols[c(1,3,5,7,9,11),10] <- summary(lm.out)$coef[1:6,1]
tab.ols[c(2,4,6,8,10,12),10] <- se[1:6]
tab.ols[13,10] <- nrow(lm.out$model)


##  negrequest, covariates
lm.out <- lm(negrequest ~ 		Tr + 
								reg_ur2007 +
								reg_ur2011 +
								distance_to_moscow +
								maincity +
								general +
								media_pro_speech +
								media_neutral_speech +
								app_putin +
								app_medvedev +
								mr.count.region,
								data = dat.outcomes)

summary(lm.out)

se <- sqrt(diag(vcovHC(lm.out, type = "HC2")))
stopifnot(!is.na(se))
tab.ols[c(1,3,5,7,9,11),11] <- summary(lm.out)$coef[1:6,1]
tab.ols[c(2,4,6,8,10,12),11] <- se[1:6]
tab.ols[13,11] <- nrow(lm.out$model)


##  negrequest, covariates and block dummies
lm.out <- lm(negrequest ~ 		Tr + 
								reg_ur2007 +
								reg_ur2011 +
								distance_to_moscow +
								maincity +
								general +
								media_pro_speech +
								media_neutral_speech +
								app_putin +
								app_medvedev +
								mr.count.region +
								as.factor(block),
								data = dat.outcomes)

summary(lm.out)

se <- sqrt(diag(vcovHC(lm.out, type = "HC2")))
stopifnot(!is.na(se))
tab.ols[c(1,3,5,7,9,11),12] <- summary(lm.out)$coef[1:6,1]
tab.ols[c(2,4,6,8,10,12),12] <- se[1:6]
tab.ols[13,12] <- nrow(lm.out$model)


round(tab.ols,2)

tab.ols.new <- tab.ols
m <- n <- 1
for(m in 1:12)	{
	for(n in c(1,3,5,7,9,11))	{

	tab.ols.new[n:(n+1),m] <- hl3(tab.ols.new[n:(n+1),m],2)

								}
				}

tab.ols.new <- rbind(tab.ols.new,NA,NA)
tab.ols.new[14,] <- rep(c("no","yes","yes"), times = 4)
tab.ols.new[15,] <- rep(c("no","no","yes"), times = 4)
rownames(tab.ols.new) <- 1:15

latex(tab.ols.new, file = "")



##  correlation plot of covariates
##  and table of summary statistics
temp <- 
cbind(
dat.outcomes[,covars[1]],
dat.outcomes[,covars[2]],
dat.outcomes[,covars[3]],
dat.outcomes[,covars[4]],
dat.outcomes[,covars[5]],
dat.outcomes[,covars[6]],
dat.outcomes[,covars[7]],
dat.outcomes[,covars[8]],
dat.outcomes[,covars[9]],
dat.outcomes[,covars[10]],
dat.outcomes[,covars[11]])

colnames(temp) <- 1:11

pdf(file = "correlationplot.pdf")
corrplot(cor(temp, use = "pairwise.complete.obs"), method = "circle")
dev.off()



##  descriptive statistics
covars.tab <- data.frame(matrix(NA,length(covars),6))
colnames(covars.tab) <- c("covariate","mean","sd","min","max","% missing")
rownames(covars.tab) <- 1:11

covars.tab[,1] <- c("Regional United Russia vote share 2007",
					"Regional United Russia vote share 2011",
					"Distance to Moscow (km)",
					"Moscow or Saint Petersburg",
					"General news site",
					"Media bias: pro-free speech",
					"Media bias: neutral",
					"Media bias: anti-free speech",
					"Putin approval",
					"Medvedev approval",
					"Incidents of repression against journalists")


m <- 1
for(m in 1:length(covars))	{

	covars.tab[m,2] <- round(mean(unlist(dat.outcomes[covars[m]]),na.rm = TRUE),2)
	covars.tab[m,3] <- round(sd(unlist(dat.outcomes[covars[m]]),na.rm = TRUE),2)
	covars.tab[m,4] <- round(min(unlist(dat.outcomes[covars[m]]),na.rm = TRUE),2)
	covars.tab[m,5] <- round(max(unlist(dat.outcomes[covars[m]]),na.rm = TRUE),2)
	covars.tab[m,6] <- round(mean(is.na(unlist(dat.outcomes[covars[m]])),na.rm = TRUE),2)

							}

latex(covars.tab, file = "")




sessionInfo()

.
